suppressMessages(library ("tidyverse"))
library(ggplot2)
library(ggmap)
library(maps)
## 
## Attaching package: 'maps'
## The following object is masked from 'package:purrr':
## 
##     map
library(mapdata)
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggmap':
## 
##     wind
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(lme4)
## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## The following object is masked from 'package:tidyr':
## 
##     expand
library(broom)
library(robustlmm)
## Warning: package 'robustlmm' was built under R version 3.4.3
library(stringr)
library(tibble)
library(lattice)
library(latticeExtra)
## Loading required package: RColorBrewer
## 
## Attaching package: 'latticeExtra'
## The following object is masked from 'package:ggplot2':
## 
##     layer
library(knitr)
library(kableExtra)
## Warning: package 'kableExtra' was built under R version 3.4.3
childata<-read.csv("C:/Users/kay/Dropbox/BST-260 project/chl data.csv")
#child<-read.sas7bdat("C:/Users/Kay/Dropbox/BST-260 project/chl.sas7bdat")

#grouping survey years in 3 groups and in 2 groups
childuse<-childata%>%
  mutate(myear= case_when(
    between(year, 2000, 2005) ~ "2000-2005",
    between(year, 2006, 2010)  ~ "2006-2010",
    between(year, 2011, 2016) ~ "2011-2016"
  ))

childouse<-childata%>%
  mutate(myear= case_when(
    between(year, 2000, 2010) ~ "2000-2010",
    between(year, 2011, 2016) ~ "2011-2016"
  ))
#Trend of HB over time
line<-childuse%>%
  filter(!is.na(hb<200&hb>30))%>%
  group_by(year)%>%
  summarise(hbs=median(hb, na.rm=TRUE))%>%
  #summarise(hbs=sum(hb<100)/length(hb))%>%
  ungroup()%>%
  ggplot(aes(year, hbs)) +
 geom_point()+
  geom_smooth()

line
## `geom_smooth()` using method = 'loess'

#Box plots of hb values by country in year groups with logarithmic conversion of of the hb
library(ggthemes)
library(gridExtra)
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
childuse%>%
  filter(hb<200&hb>30)%>%
  group_by(myear)%>% 
  mutate(country = reorder(country, hb)) %>%
  ungroup()%>%
  ggplot() +
  geom_boxplot(aes(country, hb,fill = factor(myear))) +
  scale_y_log10() +
  coord_flip()+ 
  theme_fivethirtyeight()+
  facet_wrap(~myear)

  childouse%>%
  filter(hb<200&hb>30)%>%
  group_by(myear)%>%
  ungroup()%>%
  ggplot() +
  geom_boxplot(aes(country, hb,fill = factor(myear))) +
  scale_y_log10() + 
     coord_flip()+ 
    theme(axis.text.x = element_text(angle = 90, hjust = 1))+
  facet_wrap(~myear)

childuse%>%
  filter(hb<200&hb>30)%>%
  group_by(myear)%>%
  ungroup()%>%
  ggplot(aes(country, hb))+ 
  geom_jitter(width = 0.1, alpha = 0.2) +  
  theme(axis.text.y = element_text(angle = 90, hjust = 1)) +
    scale_y_log10() +
  theme_economist_white()+
    facet_wrap(~myear)

#bar chart showing percentage of children anemic by country
snapshot<-  childuse%>%
  filter(!is.na(hb<200&hb>30))%>%
  group_by(country)%>%
  summarise(hbs=sum(hb<100)/length(hb))%>%
  mutate(country = reorder(country, hbs)) %>%
  ungroup()%>%
  ggplot(aes(country, hbs*100)) +
 geom_bar(stat="identity") +
  coord_flip() +
  xlab("")

bar_percent<-childuse%>%
  filter(!is.na(hb<200&hb>30))%>%
  group_by(myear, country)%>%
  summarise(hbs=sum(hb<100)/length(hb))%>%
  mutate(country = reorder(country, hbs)) %>%
  ungroup()%>%
  ggplot(aes(country, hbs*100)) +
 geom_bar(stat="identity") +
  coord_flip() +
  xlab("")+
  facet_wrap(~myear)
#Maps showing average hb per country 
worldMap <- map_data("world") 
contname<-read.csv("C:/Users/Kay/Dropbox/Surface Desktop/list-african-countries-dependent-territory-286j.csv")#getting country names
Africa<-worldMap%>%filter(region%in%contname$country)%>%mutate(country=region)#subsetting the world map to only african countries using the vector of african countries created above

neweruse<-childuse%>%group_by(country,myear)%>%mutate(hb=as.numeric(hb))%>%summarise(hbs=mean(hb, na.rm=TRUE))
newuse<-right_join(Africa, neweruse, by="country")
## Warning: Column `country` joining character vector and factor, coercing
## into character vector
ditch_the_axes <- theme(
  axis.text = element_blank(),
  axis.line = element_blank(),
  axis.ticks = element_blank(),
  panel.border = element_blank(),
  panel.grid = element_blank(),
  axis.title = element_blank()
  )
ca_base <- ggplot(data = Africa, mapping = aes(x = long, y = lat, group = group)) + 
  coord_fixed(1) + 
  geom_polygon(color = "black", fill = "gray")

elbow_room1 <- ca_base + 
      geom_polygon(data = newuse, aes(fill = hbs), color = "white") +
      geom_polygon(color = "red", fill = NA)+
  theme_classic()+
  facet_wrap(~myear)+
  geom_text(aes(label=country), size = 3)+
  ditch_the_axes

ggplotly(elbow_room1) %>% config(displayModeBar = FALSE) %>% config( showlink = FALSE)
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`
neweruse<-childuse%>%group_by(country)%>%mutate(hb=as.numeric(hb))%>%summarise(hbs=mean(hb, na.rm=TRUE))
newuse<-right_join(Africa, neweruse, by="country")
## Warning: Column `country` joining character vector and factor, coercing
## into character vector
ca_base <- ggplot(data = Africa, mapping = aes(x = long, y = lat, group = group)) + 
  coord_fixed(1) + 
  geom_polygon(color = "black", fill = "gray")

elbow_room1 <- ca_base + 
      geom_polygon(data = newuse, aes(fill = hbs), color = "white") +
      geom_polygon(color = "red", fill = NA)+
  theme_classic()+
  ditch_the_axes

k<-ggplotly(elbow_room1) %>% config(displayModeBar = FALSE) %>% config( showlink = FALSE)
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`
#Regression
childregress<-childuse%>%
  mutate(year=as.factor(year), 
         myear=as.factor(myear), 
         country=as.factor(country),
         totalwt=sum(as.numeric(svyweight), na.rm=TRUE),
         weight=svyweight/totalwt,
        klust=paste(country, cluster, year, sep=" "),
        klust=as.factor(klust))


reg<-lmer(hb ~ country + (1 | klust)+age + wealthindex + has_bednet + livechl + Number.of.children.5.and.under, 
          data=childregress, 
          weights=weight, 
          REML=FALSE)

parta=tidy(reg)
  
partb<-confint(reg, 
               parm =c("age","wealthindex","has_bednet","livechl","Number.of.children.5.and.under"), 
               level=0.95)
## Computing profile confidence intervals ...
## Warning in profile.merMod(object, which = parm, signames = oldNames, ...):
## non-monotonic profile for age
## Warning in profile.merMod(object, which = parm, signames = oldNames, ...):
## non-monotonic profile for wealthindex
## Warning in optwrap(optimizer, par = thopt, fn = mkdevfun(rho, 0L), lower
## = fitted@lower): convergence code 3 from bobyqa: bobyqa -- a trust region
## step failed to reduce q
## Warning in profile.merMod(object, which = parm, signames = oldNames, ...):
## non-monotonic profile for has_bednet
## Warning in profile.merMod(object, which = parm, signames = oldNames, ...):
## non-monotonic profile for livechl
## Warning in profile.merMod(object, which = parm, signames = oldNames, ...):
## non-monotonic profile for Number.of.children.5.and.under
## Warning in if (parm == "theta_") {: the condition has length > 1 and only
## the first element will be used
## Warning in if (parm == "beta_") {: the condition has length > 1 and only
## the first element will be used
## Warning in confint.thpr(pp, level = level, zeta = zeta): bad spline fit for
## age: falling back to linear interpolation
## Warning in confint.thpr(pp, level = level, zeta = zeta): bad spline fit for
## wealthindex: falling back to linear interpolation
## Warning in confint.thpr(pp, level = level, zeta = zeta): bad spline fit for
## has_bednet: falling back to linear interpolation
## Warning in confint.thpr(pp, level = level, zeta = zeta): bad spline fit for
## livechl: falling back to linear interpolation
## Warning in confint.thpr(pp, level = level, zeta = zeta): bad spline fit for
## Number.of.children.5.and.under: falling back to linear interpolation
partc<-data.frame(partb, fix.empty.names = TRUE)


part<-data.frame(parta[31:35,1:4], partc[,1:2], row.names = NULL)

kable(part, 
       caption = "Estimates and confidence intervals")%>%kable_styling()
## Currently generic markdown table using pandoc is not supported.
Estimates and confidence intervals
term estimate std.error statistic X2.5.. X97.5..
age 0.2151484 0.0026919 79.925408 0.2151484 0.2151547
wealthindex 1.2195821 0.0388218 31.414843 1.2195820 1.2196709
has_bednet 0.8266771 0.1072061 7.711102 0.8266769 0.8269203
livechl 0.1017296 0.0220738 4.608604 0.1017296 0.1017797
Number.of.children.5.and.under -0.2377546 0.0361338 -6.579845 -0.2377547 -0.2376729